{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "py=[]\n",
    "f =open('pinyin_list.txt')\n",
    "contents = f.readlines()\n",
    "for i in contents:\n",
    "    i = i.strip('\\n')\n",
    "    py.append(i)\n",
    "#将拼音列表读取到py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy.io.wavfile as wav\n",
    "from scipy.fftpack import fft\n",
    "\n",
    "\n",
    "# 获取信号的时频图\n",
    "def compute_fbank(file):\n",
    "\tx=np.linspace(0, 400 - 1, 400, dtype = np.int64)\n",
    "\tw = 0.54 - 0.46 * np.cos(2 * np.pi * (x) / (400 - 1) ) # 汉明窗\n",
    "\tfs, wavsignal = wav.read(file)\n",
    "\t# wav波形 加时间窗以及时移10ms\n",
    "\ttime_window = 25 # 单位ms\n",
    "\twindow_length = fs / 1000 * time_window # 计算窗长度的公式，目前全部为400固定值\n",
    "\twav_arr = np.array(wavsignal)\n",
    "\twav_length = len(wavsignal)\n",
    "\trange0_end = int(len(wavsignal)/fs*1000 - time_window) // 10 # 计算循环终止的位置，也就是最终生成的窗数\n",
    "# \tprint(range0_end)\n",
    "\tdata_input = np.zeros((range0_end, 200), dtype = np.float) # 用于存放最终的频率特征数据\n",
    "\tdata_line = np.zeros((1, 400), dtype = np.float)#窗口内的数据\n",
    "\tfor i in range(0, range0_end):\n",
    "\t\tp_start = i * 160#步长10ms所以\n",
    "\t\tp_end = p_start + 400#窗口长25ms\n",
    "\t\tdata_line = wav_arr[p_start:p_end]\t\n",
    "\t\tdata_line = data_line * w # 加窗\n",
    "\t\tdata_line = np.abs(fft(data_line))\n",
    "\t\tdata_input[i]=data_line[0:200] # 设置为400除以2的值（即200）是取一半数据，因为是对称的\n",
    "\tdata_input = np.log(data_input + 1)\n",
    "\t#data_input = data_input[::]\n",
    "\treturn data_input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将输入padding到可以让网络处理的大小\n",
    "def wav_padding(wav_data_lst):\n",
    "    wav_lens = [len(data) for data in wav_data_lst]\n",
    "    wav_max_len = max(wav_lens)\n",
    "    wav_lens = np.array([leng//8 for leng in wav_lens])\n",
    "    new_wav_data_lst = np.zeros((len(wav_data_lst), wav_max_len, 200, 1))\n",
    "    for i in range(len(wav_data_lst)):\n",
    "        new_wav_data_lst[i, :wav_data_lst[i].shape[0], :, 0] = wav_data_lst[i]\n",
    "    return new_wav_data_lst, wav_lens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义解码器\n",
    "def decode_ctc(num_result, num2word):\n",
    "\tresult = num_result[:, :, :]\n",
    "\tin_len = np.zeros((1), dtype = np.int32)\n",
    "\tin_len[0] = result.shape[1];\n",
    "\tr = K.ctc_decode(result, in_len, greedy = True, beam_width=10, top_paths=1)\n",
    "\tr1 = K.get_value(r[0][0])\n",
    "\tr1 = r1[0]\n",
    "\ttext = []\n",
    "\tfor i in r1:\n",
    "\t\ttext.append(num2word[i])\n",
    "\treturn r1, text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "wav_data_lst=[]\n",
    "fbank = compute_fbank('test.wav')\n",
    "fbank = fbank[:fbank.shape[0] // 8 * 8, :]\n",
    "wav_data_lst.append(fbank)\n",
    "inputdata,outher = wav_padding(wav_data_lst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import keras \n",
    "from keras import backend as K\n",
    "with open('model_def.json') as ff:\n",
    "    model_json=ff.read()\n",
    "    model=keras.models.model_from_json(model_json)\n",
    "model.load_weights('res2.h5')\n",
    "\n",
    "preds=model.predict(inputdata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[198 199 144 170 200 201 202  28 115 203 203 172 204 205 172  53 106 206\n",
      "  71  11 207  22 208 156 141 209 110 210 133 254] ['ping2', 'ding4', 'xian4', 'huai2', 'shu4', 'pu1', 'xiang1', 'xin1', 'guan1', 'cun1', 'cun1', 'min2', 'wang2', 'pei2', 'min2', 'yi4', 'jia1', 'liu4', 'kou3', 'ren2', 'yong1', 'you3', 'liang3', 'zuo4', 'xiao3', 'yuan4', 'ba1', 'yan3', 'yao2', 'er2']\n"
     ]
    }
   ],
   "source": [
    "result, text = decode_ctc(preds, py)\n",
    "\n",
    "print(result,text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "语音转文字结果：\n",
      " 评定现怀数扑乡心关村村民王培民一家六口人拥有两座小院吧眼陶而\n"
     ]
    }
   ],
   "source": [
    "from LanguageModel2 import ModelLanguage\n",
    "ml = ModelLanguage('model_language')\n",
    "ml.LoadModel()\n",
    "str_pinyin = text\n",
    "r = ml.SpeechToText(str_pinyin)\n",
    "print('语音转文字结果：\\n',r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
